## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
##
## Attaching package: 'plyr'
##
## The following object is masked from 'package:lubridate':
##
## here
##
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
##
## Loading required package: MASS
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
##
## Loading required package: Matrix
## Loading required package: lme4
## Loading required package: Rcpp
##
## arm (Version 1.7-07, built: 2014-8-27)
##
## Working directory is /Users/andreuboada/Dropbox/ESTADISTICA/itam-dm/alumnos/equipos/RandomBuddies
##
##
## Attaching package: 'arm'
##
## The following object is masked from 'package:scales':
##
## rescale
##
## Rattle: A free graphical interface for data mining with R.
## Versión 3.3.0 Copyright (c) 2006-2014 Togaware Pty Ltd.
## Escriba 'rattle()' para agitar, sacudir y rotar sus datos.
## [1] 15
## Warning: Removed 1 rows containing missing values (stat_qq).
## Warning: Removed 1 rows containing missing values (stat_qq).
## Warning: Removed 8 rows containing missing values (stat_qq).
## Warning: Removed 1 rows containing missing values (stat_qq).
## Warning: Removed 10 rows containing missing values (stat_qq).
## Warning: Removed 2 rows containing missing values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 9 rows containing missing values (stat_smooth).
## Warning: Removed 9 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (stat_smooth).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## Warning: Removed 1 rows containing non-finite values (stat_density).
## Warning: Removed 9 rows containing missing values (stat_smooth).
## Warning: Removed 9 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (stat_smooth).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## Warning: Removed 1 rows containing non-finite values (stat_density).
## Warning: Removed 8 rows containing missing values (stat_smooth).
## Warning: Removed 8 rows containing missing values (geom_point).
## Warning: Removed 8 rows containing missing values (stat_smooth).
## Warning: Removed 8 rows containing missing values (geom_point).
## Warning: Removed 8 rows containing missing values (stat_smooth).
## Warning: Removed 8 rows containing missing values (geom_point).
## Warning: Removed 9 rows containing missing values (stat_smooth).
## Warning: Removed 9 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (stat_smooth).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 8 rows containing missing values (stat_smooth).
## Warning: Removed 8 rows containing missing values (geom_point).
## Warning: Removed 8 rows containing missing values (stat_smooth).
## Warning: Removed 8 rows containing missing values (geom_point).
## Warning: Removed 8 rows containing missing values (stat_smooth).
## Warning: Removed 8 rows containing missing values (geom_point).
## Warning: Removed 8 rows containing missing values (stat_smooth).
## Warning: Removed 8 rows containing missing values (geom_point).
## Warning: Removed 8 rows containing missing values (stat_smooth).
## Warning: Removed 8 rows containing missing values (geom_point).
## Warning: Removed 8 rows containing missing values (stat_smooth).
## Warning: Removed 8 rows containing missing values (geom_point).
## Warning: Removed 8 rows containing missing values (stat_smooth).
## Warning: Removed 8 rows containing missing values (geom_point).
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## Warning: Removed 8 rows containing non-finite values (stat_density).
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 10 rows containing missing values (stat_smooth).
## Warning: Removed 10 rows containing missing values (geom_point).
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 10 rows containing missing values (stat_smooth).
## Warning: Removed 10 rows containing missing values (geom_point).
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 10 rows containing missing values (stat_smooth).
## Warning: Removed 10 rows containing missing values (geom_point).
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## Warning: Removed 11 rows containing missing values (stat_smooth).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## Warning: Removed 1 rows containing non-finite values (stat_density).
## Warning: Removed 10 rows containing missing values (stat_smooth).
## Warning: Removed 10 rows containing missing values (geom_point).
## Warning: Removed 10 rows containing missing values (stat_smooth).
## Warning: Removed 10 rows containing missing values (geom_point).
## Warning: Removed 10 rows containing missing values (stat_smooth).
## Warning: Removed 10 rows containing missing values (geom_point).
## Warning: Removed 10 rows containing missing values (stat_smooth).
## Warning: Removed 10 rows containing missing values (geom_point).
## Warning: Removed 10 rows containing missing values (stat_smooth).
## Warning: Removed 10 rows containing missing values (geom_point).
## Warning: Removed 10 rows containing missing values (stat_smooth).
## Warning: Removed 10 rows containing missing values (geom_point).
## Warning: Removed 10 rows containing missing values (stat_smooth).
## Warning: Removed 10 rows containing missing values (geom_point).
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## Warning: Removed 10 rows containing non-finite values (stat_density).
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 8 rows containing non-finite values (stat_boxplot).
## Warning: Removed 8 rows containing non-finite values (stat_boxplot).
## Warning: Removed 8 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).
Como el dinero no alcanza, tomas otro trabajo rápido para una ONG. Quieren predecir la concentración de algas en ríos de la región. Tomaron datos durante un año.
Cada observación es el efecto de agregar varias muestras de agua recolectadas en el mismo río por un periodo de 3 meses en la misma estación del año.
El set contiene 18 atributos en el siguiente orden:
| campo | atributo | valores | tipo |
|---|---|---|---|
| 1 | temporada | spring, summer, autumn, winter | categrórica |
| 2 | tamaño del río | small, medium, large | categórica |
| 3 | velocidad | low, medium, large | categórica |
| 4-11 | concentraciones químicas | Reales positivos | continua |
| 12-18 | distribución de diferentes tipos de algas | Reales positivos | continua |
Los NA están codificados como XXXXXXX.
ds
## temporada tamano velocidad mxPH mnO2 Cl NO3 NO4
## 1 winter small medium 8.000 9.80 60.800 6.238 578.000
## 2 spring small medium 8.350 8.00 57.750 1.288 370.000
## 3 autumn small medium 8.100 11.40 40.020 5.330 346.667
## 4 spring small medium 8.070 4.80 77.364 2.302 98.182
## 5 autumn small medium 8.060 9.00 55.350 10.416 233.700
## 6 winter small high 8.250 13.10 65.750 9.248 430.000
## 7 summer small high 8.150 10.30 73.250 1.535 110.000
## 8 autumn small high 8.050 10.60 59.067 4.990 205.667
## 9 winter small medium 8.700 3.40 21.950 0.886 102.750
## 10 winter small high 7.930 9.90 8.000 1.390 5.800
## 11 spring small high 7.700 10.20 8.000 1.527 21.571
## 12 summer small high 7.450 11.70 8.690 1.588 18.429
## 13 winter small high 7.740 9.60 5.000 1.223 27.286
## 14 summer small high 7.720 11.80 6.300 1.470 8.000
## 15 winter small high 7.900 9.60 3.000 1.448 46.200
## 16 autumn small high 7.550 11.50 4.700 1.320 14.750
## 17 winter small high 7.780 12.00 7.000 1.420 34.333
## 18 spring small high 7.610 9.80 7.000 1.443 31.333
## 19 summer small high 7.350 10.40 7.000 1.718 49.000
## 20 spring small medium 7.790 3.20 64.000 2.822 8777.600
## 21 winter small medium 7.830 10.70 88.000 4.825 1729.000
## 22 spring small high 7.200 9.20 0.800 0.642 81.000
## 23 autumn small high 7.750 10.30 32.920 2.942 42.000
## 24 winter small high 7.620 8.50 11.867 1.715 208.333
## 25 spring small high 7.840 9.40 10.975 1.510 12.500
## 26 summer small high 7.770 10.70 12.536 3.976 58.500
## 27 winter small high 7.090 8.40 10.500 1.572 28.000
## 28 autumn small high 6.800 11.10 9.000 0.630 20.000
## 29 winter small high 8.000 9.80 16.000 0.730 20.000
## 30 spring small high 7.200 11.30 9.000 0.230 120.000
## 31 autumn small high 7.400 12.50 13.000 3.330 60.000
## 32 winter small high 8.100 10.30 26.000 3.780 60.000
## 33 summer small high 7.800 11.30 20.083 3.020 49.500
## 34 autumn small medium 8.400 9.90 34.500 2.818 3515.000
## 35 winter small medium 8.270 7.80 29.200 0.050 6400.000
## 36 summer small medium 8.660 8.40 30.523 3.444 1911.000
## 37 winter small high 8.300 10.90 1.170 0.735 13.500
## 38 spring small high 8.000 NA 1.450 0.810 10.000
## 39 winter small medium 8.300 8.90 20.625 3.414 228.750
## 40 spring small medium 8.100 10.50 22.286 4.071 178.570
## 41 winter small medium 8.000 5.50 77.000 6.096 122.850
## 42 summer small medium 8.150 7.10 54.190 3.829 647.570
## 43 winter small high 8.300 7.70 50.000 8.543 76.000
## 44 spring small high 8.300 8.80 54.143 7.830 51.429
## 45 winter small high 8.400 13.40 69.750 4.555 37.500
## 46 spring small high 8.300 12.50 87.000 4.870 22.500
## 47 autumn small high 8.000 12.10 66.300 4.535 39.000
## 48 winter small low NA 12.60 9.000 0.230 10.000
## 49 spring small medium 7.600 9.60 15.000 3.020 40.000
## 50 autumn small medium 7.290 11.21 17.750 3.070 35.000
## 51 winter small medium 7.600 10.20 32.300 4.508 192.500
## 52 summer small medium 8.000 7.90 27.233 1.651 28.333
## 53 winter small high 7.900 11.00 6.167 1.172 18.333
## 54 spring small high 7.900 9.00 5.273 0.910 33.636
## 55 winter small high 6.600 10.80 NA 3.245 10.000
## 56 spring small medium 5.600 11.80 NA 2.220 5.000
## 57 autumn small medium 5.700 10.80 NA 2.550 10.000
## 58 spring small high 6.600 9.50 NA 1.320 20.000
## 59 summer small high 6.600 10.80 NA 2.640 10.000
## 60 autumn small medium 6.600 11.30 NA 4.170 10.000
## 61 spring small medium 6.500 10.40 NA 5.970 10.000
## 63 autumn small high 7.830 11.70 4.083 1.328 18.000
## 64 spring small high 7.570 10.80 4.575 1.203 27.500
## 65 summer small high 7.190 11.70 4.326 1.474 160.000
## 66 winter small high 7.440 10.10 2.933 0.770 15.000
## 67 spring small high 7.140 9.80 3.275 0.923 15.000
## 68 summer small high 7.000 12.10 3.136 1.208 16.200
## 69 winter small medium 7.500 1.50 32.400 0.921 1386.250
## 70 spring small medium 7.500 1.80 29.775 1.051 2082.850
## 71 summer small medium 7.800 7.10 32.540 1.720 2167.370
## 72 autumn medium medium 8.500 8.10 38.125 3.850 225.000
## 73 summer medium medium 7.925 10.20 34.037 9.080 109.000
## 74 winter medium medium 8.100 8.10 136.000 3.773 245.000
## 75 spring medium medium 8.200 6.80 129.375 3.316 271.250
## 76 spring medium high 9.100 9.40 35.750 5.164 32.500
## 77 autumn medium medium 8.100 9.80 29.500 1.287 224.286
## 78 winter medium medium 8.000 5.90 27.400 0.735 133.636
## 79 spring medium medium 8.000 3.30 26.760 0.658 165.000
## 80 winter medium high 7.500 9.20 11.000 3.310 101.000
## 81 spring medium high 7.400 9.80 11.000 3.235 255.000
## 82 autumn medium high 7.300 11.70 10.400 4.930 130.000
## 83 winter medium high 7.400 8.90 13.500 5.442 123.333
## 84 summer medium high 7.400 11.17 12.146 6.188 89.600
## 85 autumn medium medium 7.500 10.80 31.000 4.408 737.500
## 86 winter medium medium 7.600 6.00 53.000 3.734 914.000
## 87 summer medium medium 7.400 10.77 36.248 3.730 429.200
## 88 winter medium medium 7.800 3.60 48.667 4.030 5738.330
## 89 summer medium medium 7.600 9.70 53.102 7.160 4073.330
## 90 winter medium medium 8.500 8.60 125.600 3.778 124.167
## 91 spring medium medium 8.700 9.40 173.750 3.318 101.250
## 92 summer medium medium 8.100 10.70 94.405 4.698 153.000
## 93 winter medium high 8.800 8.50 53.333 5.132 96.667
## 94 spring medium high 7.800 10.50 70.000 2.443 98.333
## 95 summer medium high 7.900 11.80 63.510 4.940 137.000
## 96 autumn medium low 8.500 10.50 56.717 0.330 215.714
## 97 winter medium low 9.100 5.40 61.050 0.308 105.556
## 98 spring medium low 8.900 4.50 57.750 0.267 155.000
## 99 winter medium high 7.900 6.30 101.875 3.978 153.750
## 100 summer medium high 7.800 8.20 85.982 6.200 421.667
## 101 winter medium medium 7.700 7.10 63.625 3.140 122.500
## 102 spring medium medium 7.800 6.50 82.111 2.603 215.556
## 103 winter medium low 7.700 5.30 65.333 2.899 371.111
## 104 summer medium low 7.500 8.80 58.331 8.688 758.750
## 105 autumn medium low 7.600 10.00 49.625 5.456 308.750
## 106 winter medium low 8.700 7.40 47.778 2.316 38.111
## 107 summer medium low 7.700 11.10 47.229 8.759 239.000
## 108 autumn medium high 8.300 11.10 41.500 4.665 931.833
## 109 winter medium high 8.430 6.00 40.167 2.670 723.667
## 110 summer medium high 8.160 11.10 32.056 5.694 461.875
## 111 winter medium high 8.700 9.80 5.889 1.534 51.111
## 112 spring medium high 8.200 11.30 7.250 1.875 25.000
## 113 summer medium high 8.500 11.80 7.838 1.732 206.538
## 114 spring medium medium 7.800 6.00 53.425 0.381 118.571
## 115 summer medium medium 8.000 9.70 57.848 0.461 217.750
## 116 winter medium high 9.700 10.80 0.222 0.406 10.000
## 117 summer medium high 8.600 11.62 1.549 0.445 25.833
## 118 autumn medium medium 8.300 11.60 5.830 0.701 12.727
## 119 spring medium low 8.400 5.30 74.667 3.900 131.667
## 120 summer medium low 8.200 6.60 131.400 4.188 92.000
## 121 winter medium medium 8.200 9.40 45.273 7.195 345.455
## 122 spring medium medium 8.100 7.10 42.636 5.078 56.364
## 123 summer medium medium 8.100 9.00 48.429 6.640 128.571
## 124 winter medium high 7.400 10.70 11.818 2.163 170.909
## 125 spring medium high 8.300 9.70 10.556 1.921 65.556
## 126 summer medium high 8.600 10.70 12.000 2.231 43.750
## 127 winter medium medium 9.100 11.60 31.091 5.099 246.364
## 128 spring medium medium 9.000 6.90 28.333 2.954 76.667
## 129 summer medium medium 8.300 10.00 30.125 3.726 102.500
## 130 winter medium high 8.500 10.10 10.936 1.335 236.000
## 131 spring medium high 8.300 7.70 10.078 1.212 103.333
## 132 summer medium high 7.300 10.50 11.088 1.374 92.375
## 133 winter medium medium 7.900 9.80 194.750 6.513 3466.660
## 134 spring medium medium 7.900 8.30 391.500 6.045 380.000
## 135 autumn medium medium 8.000 11.90 130.670 6.540 196.000
## 136 spring medium medium 8.000 9.20 39.000 4.860 120.000
## 137 autumn medium medium 8.100 11.70 35.660 5.130 46.500
## 138 winter medium low 8.430 9.90 37.600 0.826 124.000
## 139 summer medium low 8.100 6.20 39.000 0.673 112.857
## 140 winter medium medium 7.900 11.20 49.900 9.773 505.000
## 141 summer medium medium 8.100 6.20 51.113 5.099 175.000
## 142 spring medium high 7.800 9.50 8.300 1.670 34.000
## 143 autumn medium high 7.900 10.50 10.207 2.304 132.250
## 144 winter medium low 8.000 4.50 79.077 8.984 920.000
## 145 spring medium low 7.600 6.30 81.333 9.715 196.667
## 146 autumn medium low 7.800 6.50 64.093 7.740 1990.160
## 147 winter medium high 8.220 8.10 41.250 1.415 172.500
## 148 autumn medium high 8.300 9.90 40.226 1.587 235.000
## 149 winter medium high 8.470 9.00 46.167 2.102 84.667
## 150 spring medium high 8.400 4.90 47.000 0.536 91.833
## 151 autumn medium high 8.870 11.00 41.163 2.273 54.750
## 152 summer medium high 7.700 4.40 53.000 2.310 90.000
## 153 autumn medium high 7.300 11.80 44.205 45.650 24064.000
## 154 spring medium medium 7.900 6.00 127.833 2.680 176.667
## 155 autumn medium medium 7.800 10.53 100.830 5.410 486.500
## 156 spring large low 7.800 3.20 94.000 4.908 1131.660
## 157 summer large low 7.600 4.90 69.000 3.685 1495.000
## 158 spring large low 8.600 3.60 50.000 0.376 134.000
## 159 autumn large low 8.400 10.60 19.220 1.655 96.833
## 160 winter large low 8.300 11.50 26.000 1.870 62.500
## 161 spring large low 9.000 5.80 NA 0.900 142.000
## 162 spring large low 9.500 5.70 44.000 0.102 146.667
## 163 summer large low 8.800 8.80 43.000 0.130 103.333
## 164 autumn large low 8.840 12.90 43.090 0.846 52.200
## 165 winter large high 7.300 9.90 16.000 4.820 101.667
## 166 autumn large high 7.400 10.68 22.350 5.414 244.600
## 167 spring large low 9.100 4.30 82.857 0.860 137.273
## 168 autumn large low 8.530 11.10 63.292 1.726 227.600
## 169 winter large low 8.560 8.70 43.970 4.053 643.000
## 170 autumn large low 8.060 8.30 38.902 3.678 627.273
## 171 winter large medium 8.240 6.10 95.367 3.561 1168.000
## 172 summer large medium 7.910 6.20 151.833 3.923 1081.660
## 173 winter large medium 8.210 9.30 104.818 3.908 124.364
## 174 spring large medium 8.500 7.30 71.444 2.512 66.667
## 175 spring large medium 8.600 10.60 208.364 4.459 197.909
## 176 winter large medium 9.060 6.35 187.183 3.351 54.778
## 177 autumn large high 8.700 10.70 4.545 0.941 32.727
## 178 spring large high 8.100 10.70 3.500 1.013 12.500
## 179 summer large high 8.400 10.29 5.326 0.996 53.846
## 180 spring large medium 8.600 10.10 2.111 0.663 11.111
## 181 summer large medium 8.200 9.50 2.200 0.672 10.000
## 182 winter large medium 8.500 10.50 2.750 0.758 10.500
## 183 summer large medium 8.300 10.00 3.860 0.866 32.000
## 184 winter large high 8.000 10.90 9.055 0.825 40.000
## 185 summer large high 8.100 10.20 7.613 0.699 32.500
## 186 winter large low 8.700 10.80 39.109 6.225 161.818
## 187 winter large low 8.700 11.70 22.455 3.765 88.182
## 188 summer large low 8.400 8.20 23.250 2.805 43.750
## 189 autumn large low 8.550 11.00 22.320 3.140 82.100
## 190 spring large medium 8.500 7.60 12.778 1.873 17.778
## 191 autumn large medium 8.700 11.40 15.541 2.323 103.000
## 192 winter large medium 8.400 10.50 12.182 1.519 65.455
## 193 spring large medium 8.200 8.20 7.333 1.003 37.778
## 194 autumn large medium 8.580 11.10 23.825 3.617 72.600
## 195 summer large medium 8.500 7.90 12.444 2.586 96.667
## 196 autumn large medium 8.400 8.40 17.375 3.833 83.750
## 197 spring large medium 8.300 10.60 14.320 3.200 125.333
## 198 autumn large medium 8.200 7.00 139.989 2.978 60.110
## 200 summer large medium 8.500 6.70 82.852 2.800 27.069
## oPO4 PO4 Chla a1 a2 a3 a4 a5 a6 a7
## 1 105.000 170.000 50.000 0.0 0.0 0.0 0.0 34.2 8.3 0.0
## 2 428.750 558.750 1.300 1.4 7.6 4.8 1.9 6.7 0.0 2.1
## 3 125.667 187.057 15.600 3.3 53.6 1.9 0.0 0.0 0.0 9.7
## 4 61.182 138.700 1.400 3.1 41.0 18.9 0.0 1.4 0.0 1.4
## 5 58.222 97.580 10.500 9.2 2.9 7.5 0.0 7.5 4.1 1.0
## 6 18.250 56.667 28.400 15.1 14.6 1.4 0.0 22.5 12.6 2.9
## 7 61.250 111.750 3.200 2.4 1.2 3.2 3.9 5.8 6.8 0.0
## 8 44.667 77.434 6.900 18.2 1.6 0.0 0.0 5.5 8.7 0.0
## 9 36.300 71.000 5.544 25.4 5.4 2.5 0.0 0.0 0.0 0.0
## 10 27.250 46.600 0.800 17.0 0.0 0.0 2.9 0.0 0.0 1.7
## 11 12.750 20.750 0.800 16.6 0.0 0.0 0.0 1.2 0.0 6.0
## 12 10.667 19.000 0.600 32.1 0.0 0.0 0.0 0.0 0.0 1.5
## 13 12.000 17.000 41.000 43.5 0.0 2.1 0.0 1.2 0.0 2.1
## 14 16.000 15.000 0.500 31.1 1.0 3.4 0.0 1.9 0.0 4.1
## 15 13.000 61.600 0.300 52.2 5.0 7.8 0.0 4.0 0.0 0.0
## 16 4.250 98.250 1.100 69.9 0.0 1.7 0.0 0.0 0.0 0.0
## 17 18.667 50.000 1.100 46.2 0.0 0.0 1.2 0.0 0.0 0.0
## 18 20.000 57.833 0.400 31.8 0.0 3.1 4.8 7.7 1.4 7.2
## 19 41.500 61.500 0.800 50.6 0.0 9.9 4.3 3.6 8.2 2.2
## 20 564.600 771.600 4.500 0.0 0.0 0.0 44.6 0.0 0.0 1.4
## 21 467.500 586.000 16.000 0.0 0.0 0.0 6.8 6.1 0.0 0.0
## 22 15.600 18.000 0.500 15.5 0.0 0.0 2.3 0.0 0.0 0.0
## 23 16.000 40.000 7.600 23.2 0.0 0.0 0.0 27.6 11.1 0.0
## 24 3.000 27.500 1.700 74.2 0.0 0.0 3.7 0.0 0.0 0.0
## 25 3.000 11.500 1.500 13.0 8.6 1.2 3.5 1.2 1.6 1.9
## 26 9.000 44.136 3.000 4.1 0.0 0.0 0.0 9.2 10.1 0.0
## 27 4.000 13.600 0.500 29.7 0.0 0.0 4.9 0.0 0.0 0.0
## 28 4.000 NA 2.700 30.3 1.9 0.0 0.0 2.1 1.4 2.1
## 29 26.000 45.000 0.800 17.1 0.0 19.6 0.0 0.0 0.0 2.5
## 30 12.000 19.000 0.500 33.9 1.0 14.6 0.0 0.0 0.0 0.0
## 31 72.000 142.000 4.900 3.4 16.0 1.2 0.0 15.3 15.8 0.0
## 32 246.000 304.000 2.800 6.9 17.1 20.2 0.0 4.0 0.0 2.9
## 33 53.000 130.750 5.800 0.0 8.0 1.9 0.0 11.2 42.7 1.2
## 34 20.000 47.000 2.300 13.6 9.1 0.0 0.0 1.4 0.0 0.0
## 35 7.400 23.000 0.900 5.3 40.7 3.3 0.0 0.0 0.0 1.9
## 36 58.875 84.460 3.600 18.3 12.4 1.0 0.0 0.0 0.0 1.0
## 37 1.625 3.000 0.200 66.0 0.0 0.0 0.0 0.0 0.0 0.0
## 38 2.500 3.000 0.300 75.8 0.0 0.0 0.0 0.0 0.0 0.0
## 39 196.620 253.250 12.320 2.0 38.5 4.1 2.2 0.0 0.0 10.2
## 40 182.420 255.280 8.957 2.2 2.7 1.0 3.7 2.7 0.0 0.0
## 41 143.710 296.000 3.700 0.0 5.9 10.6 1.7 0.0 0.0 7.1
## 42 59.429 175.046 13.200 0.0 0.0 0.0 5.7 11.3 17.0 1.6
## 43 264.900 344.600 22.500 0.0 40.9 7.5 0.0 2.4 1.5 0.0
## 44 276.850 326.857 11.840 4.1 3.1 0.0 0.0 19.7 17.0 0.0
## 45 10.000 40.667 3.900 51.8 4.1 0.0 0.0 3.1 5.5 0.0
## 46 27.000 43.500 3.300 29.5 1.0 2.7 3.2 2.9 9.6 0.0
## 47 16.000 39.000 0.800 54.4 3.4 1.2 0.0 18.7 2.0 0.0
## 48 5.000 6.000 1.100 35.5 0.0 0.0 0.0 0.0 0.0 0.0
## 49 27.000 121.000 2.800 89.8 0.0 0.0 0.0 0.0 0.0 0.0
## 50 13.000 20.812 12.100 24.8 7.4 0.0 2.5 10.6 17.1 3.2
## 51 12.750 49.333 7.900 0.0 0.0 0.0 4.6 1.2 0.0 3.9
## 52 7.300 22.900 4.500 39.1 0.0 1.2 2.2 5.4 1.5 3.2
## 53 7.750 11.800 0.500 81.9 0.0 0.0 0.0 0.0 0.0 0.0
## 54 9.000 11.818 0.800 54.0 0.0 0.0 2.4 0.0 0.0 0.0
## 55 1.000 6.500 NA 24.3 0.0 0.0 0.0 0.0 0.0 0.0
## 56 1.000 1.000 NA 82.7 0.0 0.0 0.0 0.0 0.0 0.0
## 57 1.000 4.000 NA 16.8 4.6 3.9 11.5 0.0 0.0 0.0
## 58 1.000 6.000 NA 46.8 0.0 0.0 28.8 0.0 0.0 0.0
## 59 2.000 11.000 NA 46.9 0.0 0.0 13.4 0.0 0.0 0.0
## 60 1.000 6.000 NA 47.1 0.0 0.0 0.0 0.0 1.2 0.0
## 61 2.000 14.000 NA 66.9 0.0 0.0 0.0 0.0 0.0 0.0
## 63 3.333 6.667 NA 14.4 0.0 0.0 0.0 0.0 0.0 0.0
## 64 2.000 6.750 1.000 20.3 4.3 5.5 0.0 0.0 0.0 1.4
## 65 2.500 7.200 0.300 15.8 1.7 7.8 0.0 0.0 2.4 1.4
## 66 1.333 6.000 0.600 55.5 0.0 1.7 1.4 0.0 0.0 0.0
## 67 1.250 10.750 2.500 10.3 0.0 42.8 2.2 0.0 0.0 0.0
## 68 1.800 2.500 0.500 64.2 0.0 3.0 0.0 0.0 0.0 0.0
## 69 220.750 351.600 10.000 0.0 0.0 1.5 7.6 0.0 0.0 6.1
## 70 209.857 313.600 1.000 1.9 4.9 2.6 3.0 0.0 0.0 1.9
## 71 151.125 279.066 13.100 25.5 3.9 1.0 11.0 0.0 0.0 12.5
## 72 45.000 152.333 5.200 11.3 1.7 2.0 2.2 13.3 10.6 0.0
## 73 55.000 58.623 11.600 4.4 4.0 3.3 0.0 11.7 21.4 1.2
## 74 136.750 249.250 20.870 1.9 5.8 24.8 4.6 9.5 5.1 1.2
## 75 100.000 233.500 13.000 1.6 8.0 17.6 3.7 11.5 7.0 0.0
## 76 85.500 215.500 18.370 2.2 9.6 5.0 1.0 8.6 7.9 2.2
## 77 25.167 102.333 3.600 64.9 1.0 0.0 1.0 2.9 1.4 1.0
## 78 36.000 105.727 3.000 15.1 7.3 23.2 3.4 4.1 0.0 0.0
## 79 37.375 111.375 3.000 14.4 0.0 11.8 11.3 5.5 0.0 0.0
## 80 26.600 108.000 1.300 6.7 0.0 5.4 3.4 4.9 6.9 10.8
## 81 38.750 56.667 2.000 10.8 0.0 0.0 4.6 6.5 2.2 1.4
## 82 10.800 60.000 4.300 1.2 0.0 1.7 0.0 7.5 17.7 14.4
## 83 27.667 104.000 21.000 12.6 4.3 21.9 1.0 2.4 3.3 22.1
## 84 32.000 69.930 3.100 14.7 4.1 1.0 0.0 7.7 8.5 31.2
## 85 111.250 214.000 2.900 3.3 0.0 0.0 5.0 1.9 6.2 25.6
## 86 137.600 254.600 4.300 0.0 0.0 0.0 4.6 9.0 13.1 30.1
## 87 57.600 169.001 3.200 2.8 0.0 0.0 2.6 5.2 13.2 16.7
## 88 412.333 607.167 4.300 0.0 0.0 2.6 2.4 5.0 0.0 2.4
## 89 282.167 624.733 6.800 0.0 0.0 0.0 1.0 35.6 9.9 0.0
## 90 197.833 303.333 40.000 0.0 15.2 8.8 0.0 8.6 5.1 2.7
## 91 267.750 391.750 3.500 0.0 5.5 3.3 0.0 20.8 12.4 0.0
## 92 191.750 265.250 7.300 0.0 2.1 1.6 0.0 20.8 32.9 0.0
## 93 120.500 232.833 31.000 1.2 5.6 6.3 1.7 1.2 0.0 1.0
## 94 144.667 244.000 9.000 0.0 3.1 3.5 1.6 8.2 9.9 0.0
## 95 159.500 218.000 6.500 0.0 5.2 0.0 0.0 28.8 20.4 1.0
## 96 23.000 138.500 20.829 5.7 0.0 0.0 4.4 12.4 8.3 7.8
## 97 104.222 239.000 72.478 3.6 31.9 2.4 0.0 0.0 0.0 2.2
## 98 97.333 235.667 98.817 1.2 16.2 0.0 0.0 0.0 0.0 1.0
## 99 51.750 205.875 2.000 4.0 2.1 35.1 6.8 7.3 0.0 0.0
## 100 31.333 211.667 21.900 5.9 3.4 1.0 1.2 17.8 49.4 1.0
## 101 28.625 186.500 30.000 16.5 2.1 19.5 3.5 5.3 1.2 3.2
## 102 12.889 154.125 5.200 7.0 0.0 13.5 4.3 8.7 0.0 4.3
## 103 51.111 183.667 17.200 58.7 0.0 11.5 6.6 0.0 0.0 0.0
## 104 104.500 292.625 3.000 8.7 0.0 3.0 5.3 9.4 33.2 0.0
## 105 38.625 285.714 75.000 17.0 21.6 1.6 1.4 10.2 3.6 1.1
## 106 24.667 201.778 3.000 12.3 5.4 1.9 0.0 1.4 0.0 1.9
## 107 54.000 275.143 65.700 8.8 19.6 4.7 0.0 0.0 0.0 2.7
## 108 39.000 124.200 13.100 23.7 13.7 0.0 1.7 6.4 2.6 0.0
## 109 60.833 141.833 25.000 0.0 6.4 7.3 12.7 0.0 0.0 4.2
## 110 71.000 132.546 15.000 3.6 38.8 0.0 0.0 1.2 0.0 2.4
## 111 9.667 17.333 1.000 64.3 1.5 8.0 0.0 0.0 0.0 0.0
## 112 6.500 26.000 0.300 46.6 0.0 2.5 0.0 0.0 0.0 0.0
## 113 8.692 16.662 2.100 24.0 0.0 1.0 0.0 0.0 0.0 0.0
## 114 37.857 102.571 1.200 3.7 1.4 1.1 2.1 3.2 6.4 0.0
## 115 37.000 86.997 3.000 18.1 14.5 0.0 0.0 11.5 22.3 0.0
## 116 22.444 10.111 NA 41.0 1.5 0.0 0.0 0.0 0.0 0.0
## 117 16.833 18.293 1.400 43.7 0.0 1.2 0.0 0.0 4.7 0.0
## 118 3.545 13.200 3.200 86.6 0.0 0.0 0.0 0.0 0.0 0.0
## 119 261.600 432.909 24.917 1.9 12.7 25.9 0.0 0.0 0.0 6.8
## 120 238.200 320.400 6.800 1.2 1.9 22.9 0.0 8.1 0.0 0.0
## 121 144.000 287.000 9.882 1.4 18.4 0.0 0.0 20.0 29.5 0.0
## 122 166.727 262.727 17.200 1.6 8.9 6.6 0.0 9.2 1.6 1.4
## 123 181.000 222.286 6.429 3.3 11.6 7.0 0.0 17.9 4.7 0.0
## 124 36.909 122.000 5.555 14.6 0.0 0.0 1.9 22.1 12.7 1.4
## 125 61.556 127.222 5.233 1.7 0.0 10.3 2.6 8.9 6.7 0.0
## 126 62.625 89.625 2.150 3.3 0.0 0.0 1.9 34.3 7.1 6.0
## 127 55.000 284.000 88.255 0.0 36.6 4.1 0.0 1.2 16.7 6.1
## 128 102.333 277.333 110.456 0.0 16.4 10.1 0.0 0.0 0.0 6.6
## 129 75.875 177.625 50.225 1.5 32.8 1.0 4.1 0.0 15.8 2.4
## 130 34.636 72.900 11.100 4.2 0.0 1.4 1.9 16.2 0.0 1.4
## 131 48.667 82.444 2.000 4.1 0.0 25.3 2.1 8.0 0.0 18.6
## 132 48.625 66.750 3.300 1.2 0.0 2.3 0.0 44.4 7.5 1.9
## 133 23.000 173.750 15.300 0.0 0.0 1.0 0.0 9.0 64.6 0.0
## 134 173.000 317.000 5.500 2.4 1.7 4.2 8.3 1.7 0.0 2.4
## 135 75.000 84.000 4.500 7.8 8.7 2.1 0.0 14.9 22.9 2.4
## 136 187.000 213.000 2.000 10.3 26.5 6.1 0.0 5.6 1.5 2.2
## 137 49.000 88.500 2.500 1.5 72.6 0.0 0.0 3.4 6.8 3.4
## 138 32.500 115.000 11.700 9.2 2.9 2.0 1.3 2.5 0.0 0.0
## 139 60.000 98.143 2.000 28.1 0.0 0.0 4.0 1.2 0.0 0.0
## 140 67.500 143.750 5.450 2.1 2.6 0.0 0.0 15.0 15.7 0.0
## 141 132.500 197.143 6.400 1.4 15.7 1.4 0.0 3.5 0.0 1.6
## 142 16.800 35.200 1.000 19.0 0.0 22.0 5.0 1.1 5.4 0.0
## 143 10.583 23.485 2.000 42.5 0.0 2.2 1.0 0.0 0.0 0.0
## 144 70.000 200.231 19.400 2.5 1.4 1.4 6.2 4.1 1.8 3.9
## 145 77.333 147.833 3.000 4.4 11.2 6.8 0.0 1.0 0.0 31.6
## 146 47.500 276.000 8.100 6.5 4.1 0.0 7.7 9.9 18.2 7.0
## 147 46.667 123.333 30.400 39.7 12.7 0.0 1.1 2.7 0.0 1.6
## 148 33.800 75.207 23.800 32.8 28.0 2.0 3.5 1.0 0.0 1.5
## 149 48.000 116.200 7.300 12.2 16.0 1.0 1.4 1.9 1.2 0.0
## 150 109.000 188.667 32.000 1.9 25.4 21.7 0.0 0.0 1.0 0.0
## 151 39.000 72.696 22.700 0.0 5.6 1.2 0.0 8.0 2.7 0.0
## 152 22.200 116.200 16.000 0.0 0.0 0.0 1.2 5.7 32.1 0.0
## 153 44.000 34.000 53.100 2.2 0.0 0.0 1.2 5.9 77.6 0.0
## 154 27.500 76.333 2.100 3.4 21.5 14.0 1.8 3.9 0.0 0.0
## 155 24.000 58.374 27.500 2.8 1.9 0.0 1.2 19.0 4.5 0.0
## 156 175.667 361.000 28.567 24.8 10.4 0.0 6.9 0.0 0.0 2.7
## 157 234.500 236.000 22.500 32.5 12.0 0.0 5.0 0.0 0.0 1.9
## 158 54.100 125.800 26.800 0.0 28.0 0.0 0.0 0.0 0.0 15.1
## 159 20.667 54.916 20.600 0.0 11.3 1.8 0.0 2.5 0.0 1.4
## 160 30.750 75.333 34.750 0.0 20.1 0.0 0.0 0.0 0.0 0.0
## 161 102.000 186.000 68.050 1.7 20.6 1.5 2.2 0.0 0.0 0.0
## 162 151.333 252.500 93.683 12.3 21.7 3.9 0.0 0.0 0.0 3.9
## 163 180.667 269.667 92.667 7.2 28.2 0.0 0.0 0.0 0.0 3.3
## 164 8.600 46.438 81.540 3.4 21.5 0.0 0.0 0.0 0.0 2.7
## 165 14.667 85.000 2.000 0.0 0.0 0.0 2.4 0.0 17.8 3.6
## 166 66.400 171.272 3.800 1.1 0.0 1.4 0.0 6.6 42.1 5.2
## 167 102.364 232.900 54.367 0.0 6.0 2.9 0.0 0.0 0.0 2.9
## 168 84.300 146.452 21.220 1.4 14.7 2.5 0.0 0.0 0.0 2.0
## 169 221.900 246.667 14.700 12.5 2.1 0.0 1.2 6.4 4.5 1.7
## 170 205.636 219.909 6.209 0.0 0.0 0.0 0.0 8.6 52.5 0.0
## 171 236.400 272.222 20.578 2.5 13.2 0.0 2.0 7.4 17.2 0.0
## 172 346.167 388.167 5.083 1.7 12.0 4.9 2.7 0.0 5.9 1.7
## 173 82.222 167.900 5.609 1.4 4.6 10.8 2.2 5.5 42.4 0.0
## 174 64.389 137.778 9.384 0.0 3.8 16.0 4.0 0.0 0.0 3.3
## 175 87.333 194.100 27.618 0.0 1.2 0.0 0.0 11.3 11.5 0.0
## 176 159.167 221.278 20.800 0.0 21.1 3.7 0.0 0.0 0.0 1.9
## 177 16.000 21.300 1.100 39.7 0.0 12.9 0.0 0.0 0.0 0.0
## 178 12.750 11.000 0.600 37.3 9.7 13.6 0.0 2.2 0.0 1.2
## 179 7.667 14.354 0.800 52.4 7.5 9.4 0.0 1.4 1.9 0.0
## 180 3.222 7.000 1.300 48.3 2.0 0.0 0.0 0.0 0.0 0.0
## 181 3.800 6.200 0.800 50.4 3.8 0.0 0.0 0.0 0.0 0.0
## 182 4.000 7.654 4.000 56.8 5.0 0.0 0.0 0.0 0.0 0.0
## 183 6.000 16.000 2.860 17.3 6.7 19.7 0.0 0.0 0.0 0.0
## 184 21.083 56.091 NA 16.8 19.6 4.0 0.0 0.0 0.0 0.0
## 185 26.625 52.875 2.000 18.1 1.7 2.0 0.0 1.7 5.9 0.0
## 186 104.727 228.364 46.075 1.1 3.9 2.1 0.0 3.9 4.6 2.3
## 187 41.300 85.400 17.491 0.0 4.7 0.0 0.0 2.6 2.6 0.0
## 188 51.125 87.125 14.775 0.0 12.0 1.7 0.0 2.7 0.0 0.0
## 189 45.900 101.455 18.330 1.7 7.0 1.2 0.0 4.8 3.1 0.0
## 190 50.889 127.000 24.556 0.0 0.0 10.2 1.7 1.2 0.0 5.5
## 191 34.500 81.558 5.620 7.6 0.0 1.2 0.0 15.9 31.8 5.9
## 192 19.727 50.455 8.155 2.9 4.6 1.0 0.0 6.6 16.6 0.0
## 193 19.111 120.889 5.111 2.2 12.7 8.8 0.0 0.0 0.0 1.2
## 194 51.111 91.111 22.900 3.8 22.0 2.9 0.0 3.1 5.5 0.0
## 195 19.111 61.444 6.167 18.9 13.2 5.0 0.0 6.1 0.0 0.0
## 196 53.625 79.750 2.338 12.7 21.7 5.6 0.0 1.0 0.0 0.0
## 197 35.333 75.904 4.667 18.0 7.0 1.7 0.0 4.8 10.3 1.0
## 198 78.333 140.220 31.738 0.0 15.9 2.4 1.0 0.0 0.0 0.0
## 200 64.000 140.517 18.300 2.4 10.5 9.0 7.8 0.0 0.0 5.8
dim(ds)
## [1] 198 18
names(ds)
## [1] "temporada" "tamano" "velocidad" "mxPH" "mnO2"
## [6] "Cl" "NO3" "NO4" "oPO4" "PO4"
## [11] "Chla" "a1" "a2" "a3" "a4"
## [16] "a5" "a6" "a7"
str(ds)
## 'data.frame': 198 obs. of 18 variables:
## $ temporada: Factor w/ 4 levels "autumn","spring",..: 4 2 1 2 1 4 3 1 4 4 ...
## $ tamano : Factor w/ 3 levels "large","medium",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ velocidad: Factor w/ 3 levels "high","low","medium": 3 3 3 3 3 1 1 1 3 1 ...
## $ mxPH : num 8 8.35 8.1 8.07 8.06 8.25 8.15 8.05 8.7 7.93 ...
## $ mnO2 : num 9.8 8 11.4 4.8 9 13.1 10.3 10.6 3.4 9.9 ...
## $ Cl : num 60.8 57.8 40 77.4 55.4 ...
## $ NO3 : num 6.24 1.29 5.33 2.3 10.42 ...
## $ NO4 : num 578 370 346.7 98.2 233.7 ...
## $ oPO4 : num 105 428.8 125.7 61.2 58.2 ...
## $ PO4 : num 170 558.8 187.1 138.7 97.6 ...
## $ Chla : num 50 1.3 15.6 1.4 10.5 ...
## $ a1 : num 0 1.4 3.3 3.1 9.2 15.1 2.4 18.2 25.4 17 ...
## $ a2 : num 0 7.6 53.6 41 2.9 14.6 1.2 1.6 5.4 0 ...
## $ a3 : num 0 4.8 1.9 18.9 7.5 1.4 3.2 0 2.5 0 ...
## $ a4 : num 0 1.9 0 0 0 0 3.9 0 0 2.9 ...
## $ a5 : num 34.2 6.7 0 1.4 7.5 22.5 5.8 5.5 0 0 ...
## $ a6 : num 8.3 0 0 0 4.1 12.6 6.8 8.7 0 0 ...
## $ a7 : num 0 2.1 9.7 1.4 1 2.9 0 0 0 1.7 ...
NOTA: Indicar si hay una discrepancia entre las clases de las variables en el data set y en su significado, i.e. fechas que no son fechas, si no factores, etc.
No hay discrepancias porque las primeras tres variables tienen los factores correctos y las últimas variables son todas numéricas.
head(ds)
## temporada tamano velocidad mxPH mnO2 Cl NO3 NO4 oPO4
## 1 winter small medium 8.00 9.8 60.800 6.238 578.000 105.000
## 2 spring small medium 8.35 8.0 57.750 1.288 370.000 428.750
## 3 autumn small medium 8.10 11.4 40.020 5.330 346.667 125.667
## 4 spring small medium 8.07 4.8 77.364 2.302 98.182 61.182
## 5 autumn small medium 8.06 9.0 55.350 10.416 233.700 58.222
## 6 winter small high 8.25 13.1 65.750 9.248 430.000 18.250
## PO4 Chla a1 a2 a3 a4 a5 a6 a7
## 1 170.000 50.0 0.0 0.0 0.0 0.0 34.2 8.3 0.0
## 2 558.750 1.3 1.4 7.6 4.8 1.9 6.7 0.0 2.1
## 3 187.057 15.6 3.3 53.6 1.9 0.0 0.0 0.0 9.7
## 4 138.700 1.4 3.1 41.0 18.9 0.0 1.4 0.0 1.4
## 5 97.580 10.5 9.2 2.9 7.5 0.0 7.5 4.1 1.0
## 6 56.667 28.4 15.1 14.6 1.4 0.0 22.5 12.6 2.9
tail(ds)
## temporada tamano velocidad mxPH mnO2 Cl NO3 NO4 oPO4
## 194 autumn large medium 8.58 11.1 23.825 3.617 72.600 51.111
## 195 summer large medium 8.50 7.9 12.444 2.586 96.667 19.111
## 196 autumn large medium 8.40 8.4 17.375 3.833 83.750 53.625
## 197 spring large medium 8.30 10.6 14.320 3.200 125.333 35.333
## 198 autumn large medium 8.20 7.0 139.989 2.978 60.110 78.333
## 200 summer large medium 8.50 6.7 82.852 2.800 27.069 64.000
## PO4 Chla a1 a2 a3 a4 a5 a6 a7
## 194 91.111 22.900 3.8 22.0 2.9 0.0 3.1 5.5 0.0
## 195 61.444 6.167 18.9 13.2 5.0 0.0 6.1 0.0 0.0
## 196 79.750 2.338 12.7 21.7 5.6 0.0 1.0 0.0 0.0
## 197 75.904 4.667 18.0 7.0 1.7 0.0 4.8 10.3 1.0
## 198 140.220 31.738 0.0 15.9 2.4 1.0 0.0 0.0 0.0
## 200 140.517 18.300 2.4 10.5 9.0 7.8 0.0 0.0 5.8
ds[sample(ds,6),]
summary(ds)
## temporada tamano velocidad mxPH mnO2
## autumn:40 large :44 high :84 Min. :5.60 Min. : 1.500
## spring:53 medium:84 low :33 1st Qu.:7.70 1st Qu.: 7.800
## summer:44 small :70 medium:81 Median :8.06 Median : 9.800
## winter:61 Mean :8.02 Mean : 9.125
## 3rd Qu.:8.40 3rd Qu.:10.800
## Max. :9.70 Max. :13.400
## NA's :1 NA's :1
## Cl NO3 NO4 oPO4
## Min. : 0.222 Min. : 0.050 Min. : 5.00 Min. : 1.00
## 1st Qu.: 10.981 1st Qu.: 1.296 1st Qu.: 38.33 1st Qu.: 15.70
## Median : 32.730 Median : 2.675 Median : 103.17 Median : 40.15
## Mean : 43.636 Mean : 3.282 Mean : 501.30 Mean : 73.59
## 3rd Qu.: 57.824 3rd Qu.: 4.446 3rd Qu.: 226.95 3rd Qu.: 99.33
## Max. :391.500 Max. :45.650 Max. :24064.00 Max. :564.60
## NA's :8
## PO4 Chla a1 a2
## Min. : 1.0 Min. : 0.200 Min. : 0.000 Min. : 0.000
## 1st Qu.: 43.5 1st Qu.: 2.000 1st Qu.: 1.525 1st Qu.: 0.000
## Median :104.0 Median : 5.475 Median : 6.950 Median : 3.000
## Mean :138.5 Mean : 13.971 Mean :16.996 Mean : 7.471
## 3rd Qu.:214.0 3rd Qu.: 18.308 3rd Qu.:24.800 3rd Qu.:11.275
## Max. :771.6 Max. :110.456 Max. :89.800 Max. :72.600
## NA's :1 NA's :10
## a3 a4 a5 a6
## Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000
## Median : 1.550 Median : 0.000 Median : 2.000 Median : 0.000
## Mean : 4.334 Mean : 1.997 Mean : 5.116 Mean : 6.005
## 3rd Qu.: 4.975 3rd Qu.: 2.400 3rd Qu.: 7.500 3rd Qu.: 6.975
## Max. :42.800 Max. :44.600 Max. :44.400 Max. :77.600
##
## a7
## Min. : 0.000
## 1st Qu.: 0.000
## Median : 1.000
## Mean : 2.487
## 3rd Qu.: 2.400
## Max. :31.600
##
library(stringr)
# Usaremos la función que hiciste de ejercicio
names(ds) <- normalizarNombres(names(ds))
Además de normalizar los nombres de variables, este es el lugar para poner nombres que tengan significado como que la columna que tenga datos de fecha, se llame fecha o date.
names(ds)
## [1] "temporada" "tamano" "velocidad" "mx.pH" "mn.o2"
## [6] "Cl" "NO3" "NO4" "o.pO4" "PO4"
## [11] "Chla" "a1" "a2" "a3" "a4"
## [16] "a5" "a6" "a7"
Las clases de las variables son
sapply(ds, class)
## temporada tamano velocidad mx.pH mn.o2 Cl NO3
## "factor" "factor" "factor" "numeric" "numeric" "numeric" "numeric"
## NO4 o.pO4 PO4 Chla a1 a2 a3
## "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
## a4 a5 a6 a7
## "numeric" "numeric" "numeric" "numeric"
En esta sección arreglamos los formatos de los datos. Un ejemplo típico son las fechas.
Otros problemas con variables son: categóricas/numéricas que no lo son, booleanas que no lo son, ordenar variables nominales, reetiquetar las variables categóricas, etc.
Para arreglar las fechas, utiliza el paquete lubridate.
El formato de fechas debe de ser YMD y si es timestamp debe de serlo hasta la precisión que den los datos, no más, no menos.
# Ejemplo hipotético
ds$fecha <- ymd(as.character(ds$fecha))
NOTA: Es recomendable hacer todas las transformaciones en un solo mutate y no una por una (a menos que haya problemas de memoria, y hay que usar otras técnicas).
Así quedan las variables corregidas:
sapply(ds, class)
## temporada tamano velocidad mx.pH mn.o2 Cl NO3
## "factor" "factor" "factor" "numeric" "numeric" "numeric" "numeric"
## NO4 o.pO4 PO4 Chla a1 a2 a3
## "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
## a4 a5 a6 a7
## "numeric" "numeric" "numeric" "numeric"
En esta sección incluímos la transformación de las variables necesarias (normalización, estandarización, binning, log, etc.)
NOTA: Es recomendable hacer todas las transformaciones en un solo mutate y no una por una (a menos que haya problemas de memoria, y hay que usar otras técnicas). # Identificación de variables
vars <- names(ds) # Guardamos los nombres de variables
target <- "" # Si el modelo es supervisado
risk <- "" # Si se proveé, es la importancia de la observación respecto a la variable (es una variable de salida)
costo <- "" # Costo de equivocarse en la predicción (Si se proveé) (es una variable de salida)
id <- "" # Armar una id con columnas, o seleccionar el id del dataset
Antes de pasar a la etapa de ignorar variables, es importante recodificar.
Hay métodos como el randomForest que no soporta variables categóricas con más de 32 niveles, habría que agruparlos (e.g. si son países se pueden reagrupar por región, similitud -esto requiere otra base de datos, etc.)
Si las fechas son timestamp hay que extraer variables categóricas como mes, día.de.la.semana, fin.de.semana, temporada, etc. claro que depende del tipo del problema al que nos estemos enfrentando.
Identificamos en una variable, las columnas a ignorar en el entrenamiento del modelo.
vars.a.ignorar <- union(id, if (exists("risk")) risk, if (exists("costo")) costo)
# Ignoramos las que tengan un único valor por cada observación, pueden ser IDs
# IMPORTANTE: Esto puede eliminar fechas, ver sección anterior
ids <- names(which(sapply(ds, function(x) length(unique(x)) == nrow(ds)))
# Ignoramos los factores que tengan muchos niveles
# IMPORTANTE: ver sección anterior
factors <- which(sapply(ds[vars], is.factor))
niveles <- sapply(factors, function(x) length(levels(ds[[x]])))
(muchos.niveles <- names(which(niveles > 20)))
vars.a.ignorar <- union(vars.a.ignorar, muchos.niveles)
# Constantes
constantes <- names(which(sapply(ds[vars], function(x) all(x == x[1L]))))
var.a.ignorar <- union(vars.a.ignorar, ids, constantes
# Las que sean puros NAs
ids.nas.count <- sapply(ds[vars], function(x) sum(is.na(x)))
ids.nas <- names(which(ids.nas.count == nrow(ds)))
#vars.a.ignorar <- union(ids.nas, vars.a.ignorar)
# Las que tengan muchos NAs (un 70% o más)
ids.many.nas <- names(which(ids.nas.count >= 0.7*nrow(ds)))
#vars.a.ignorar <- union(ids.many.nas, vars.a.ignorar)
target)Si el problema de minado, es supervisado, removemos las observaciones que tengan NA en la variable target
dim(ds)
ds <- ds[!is.na(ds[target]),]
dim(ds)
Si el problema es de clasificación, hay que convertir la variable target a categórica.
ds[target] <- as.factor(ds[[target]])
table(ds[target])
Mostramos la distribución (esto nos indicará si el problema no está balanceado)
ggplot(data=ds, aes_string(x=target)) + geom_bar(width=0.3)
vars.cor <- cor(ds[which(sapply(ds, is.numeric))], use="complete.obs")
vars.cor[upper.tri(vars.cor, diag=TRUE)] <- NA
vars.cor <- vars.cor %>%
abs() %>%
data.frame() %>%
mutate(var1=row.names(vars.cor)) %>%
gather(var2, cor, -var1) %>%
na.omit()
vars.cor <- vars.cor[order(-abs(vars.cor$cor)), ]
(muy.cor <- filter(vars.cor, cor > 0.95)) # Mostramos las que tengan más del 95% de correlación
## [1] var1 var2 cor
## <0 rows> (or 0-length row.names)
# Habría que decidir si se remueven y cuales se remueven (var1 o var2)
#vars.a.ignorar <- union(vars.a.ignorar, muy.cor$var2)
vars.a.ignorar <- muy.cor$var2
NOTA: ¿Qué pasa con las categóricas? ¿Usamos asociación o independencia?
En esta sección hay que poner la estrategia de manejo de valores faltantes elegida durante la etapa del EDA.
ListaCategoricas <- c("temporada","tamano","velocidad")
Mode <- function(x) {
ux <- unique(x)
ux[which.max(tabulate(match(x, ux)))]
}
imputarValorCentral <- function(data, colnames) {
for(i in 1:length(colnames)){
if(class(data[,colnames[i]])=='numeric'){
ind = is.na(data[,colnames[i]])
data[ind,colnames[i]] = rnorm(sum(ind), mean(data[,colnames[i]],na.rm = TRUE),sd(data[,colnames[i]], na.rm = TRUE))
}else{
ind = is.na(data[,colnames[i]])
data[ind,colnames[i]] = Mode(data[,colnames[i]])
}
}
}
ggplot(data=ds) +
aes(x=o.pO4, y=PO4) +
geom_point(shape=1) + # Usamos una bolita para los puntos
geom_smooth(method=lm, se=FALSE)
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
na.ind <- is.na(ds$PO4)
modelo <- lm(PO4 ~ o.pO4, data=ds)
summary(modelo)
##
## Call:
## lm(formula = PO4 ~ o.pO4, data = ds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -110.12 -36.34 -12.68 23.26 216.98
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 42.897 4.808 8.922 3.34e-16 ***
## o.pO4 1.293 0.041 31.535 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 52.37 on 195 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.8361, Adjusted R-squared: 0.8352
## F-statistic: 994.5 on 1 and 195 DF, p-value: < 2.2e-16
pred <- predict.lm(modelo, ds[na.ind,])
ds[na.ind,'PO4'] <- pred
Hay muy pocas ocasiones donde es recomendable dejar que el modelo se encargue de las imputaciones.
Las observaciones a omitir, guárdalas en observaciones.omitidas.
Removemos espacios, puntuaciones, camelCase, etc. en los niveles de los factores supervivientes.
factors <- which(sapply(ds[vars], is.factor))
for (f in factors) levels(ds[[f]]) <- normalizarNombres(levels(ds[[f]]))
(vars.input <- setdiff(vars, target))
## [1] "temporada" "tamano" "velocidad" "mx.pH" "mn.o2"
## [6] "Cl" "NO3" "NO4" "o.pO4" "PO4"
## [11] "Chla" "a1" "a2" "a3" "a4"
## [16] "a5" "a6" "a7"
idxs.input <- sapply(vars.input, function(x) which(x == names(ds)), USE.NAMES=FALSE)
idxs.numericas <- intersect(idxs.input, which(sapply(ds, is.numeric)))
(vars.numericas <- names(ds)[idxs.numericas])
## [1] "mx.pH" "mn.o2" "Cl" "NO3" "NO4" "o.pO4" "PO4" "Chla"
## [9] "a1" "a2" "a3" "a4" "a5" "a6" "a7"
idxs.categoricas <- intersect(idxs.input, which(sapply(ds, is.factor)))
(vars.categoricas <- names(ds)[idxs.categoricas])
## [1] "temporada" "tamano" "velocidad"
# Por conveniencia guardamos el número de observaciones supervivientes
num.observaciones <- nrow(ds)
## R version 3.1.1 (2014-07-10)
## Platform: x86_64-apple-darwin10.8.0 (64-bit)
##
## locale:
## [1] es_ES.UTF-8/es_ES.UTF-8/es_ES.UTF-8/C/es_ES.UTF-8/es_ES.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] rattle_3.3.0 RColorBrewer_1.0-5 arm_1.7-07
## [4] lme4_1.1-7 Rcpp_0.11.3 Matrix_1.1-4
## [7] MASS_7.3-35 scales_0.2.4 reshape2_1.4
## [10] plyr_1.8.1 stringr_0.6.2 lubridate_1.3.3
## [13] ggplot2_1.0.0 tidyr_0.1 dplyr_0.3.0.2
##
## loaded via a namespace (and not attached):
## [1] abind_1.4-0 assertthat_0.1 coda_0.16-1 colorspace_1.2-4
## [5] DBI_0.3.1 digest_0.6.4 evaluate_0.5.5 formatR_1.0
## [9] grid_3.1.1 gtable_0.1.2 htmltools_0.2.6 knitr_1.7
## [13] labeling_0.3 lattice_0.20-29 lazyeval_0.1.9 magrittr_1.0.1
## [17] memoise_0.2.1 minqa_1.2.4 munsell_0.4.2 nlme_3.1-118
## [21] nloptr_1.0.4 parallel_3.1.1 proto_0.3-10 rmarkdown_0.3.10
## [25] splines_3.1.1 tools_3.1.1 yaml_2.1.13